---
title: "Final Replication Code"
author: "Noah Shenker, Madai Urteaga, Kirsten Walters"
date: "12/14/2020"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

#setwd() as appropriate:
#setwd("/Users/kirsten/Desktop/Dropbox/Rep Paper/Data and Code/Data for Substantive Contributions/DataCleaned")

#Loading packages:
require(tidyverse)
require(ggplot2)
require(stargazer)
require(RFGLS)
require(lfe)
require(plm)
require(multiwayvcov)
require(lmtest)
require(estimatr)
require(mvtnorm)
require(xtable)
require(kable)
require(Amelia)
require(Hmisc)
require(broom)
require(furniture)
require(cowplot)

#Reading in the data
kimuli_final <- read.csv('kimuli_final.csv')
kimuli_final$LA_SSA <- as.factor(kimuli_final$LA_SSA)
```

## Table 1: Summary statistics

```{r Table 1}
kimuli_sum <- kimuli_final %>%
  select(elec_dist_1_5, ge_est,
           polity2_new, gini_pre_tt_new,
           homicide_rate_new, eth_frac_new, pr,
         compulsory, concurrent) %>% # select variables to summarise
  rename(elec.dist.1.5 = elec_dist_1_5, ge.est = ge_est,
         polity2.new = polity2_new, gini.pre.tt.new = gini_pre_tt_new,
         homicide.rate.new = homicide_rate_new, 
         eth.frac.new = eth_frac_new) %>% 
  summarise_all(list(min = ~ min(., na.rm = T), 
                      q25 = ~ quantile(., 0.25, na.rm = T), 
                      median = ~ median(., na.rm = T), 
                      q75 = ~ quantile(., 0.75, na.rm = T), 
                      max = ~ max(., na.rm = T),
                      mean = ~ mean(., na.rm = T), 
                      sd = ~ sd(., na.rm = T))) # calculating summary statistics

kimuli_sum_tidy <- kimuli_sum %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_") %>%
  spread(stat, val) %>%
  select(var, min, q25, median, q75, max, mean, sd) %>% 
  mutate_if(is.numeric, ~ round(., digits = 2)) # gathering into a summary table

xtable(kimuli_sum_tidy) # creating LaTeX table of summary statistics
```

## Figure 1: coefficient plot comparing replication to updated model
Also includes setting up Amelia

```{r Figure 1}
##Filtering out collinear variables that would make matrix non-invertible:
kimuli_impute <- kimuli_final[, -c(8, 16:22)]
kimuli_impute <- kimuli_impute[, -c(16, 18)]
kimuli_impute <- kimuli_impute[, -c(17:20)]
kimuli_impute <- kimuli_impute[, -c(20:25)]
kimuli_impute <- kimuli_impute[, -c(26:30)]
kimuli_impute <- kimuli_impute[, -c(21:22, 24)]
kimuli_impute <- kimuli_impute[, -c(32:33, 35:37)]
kimuli_impute <- dplyr::select(kimuli_impute, -c("abs_red_gini", "tax_abs_income", "tax_goods_services", "gini_pre_tt_se_new"))

kimuli_impute <- as.data.frame(kimuli_impute)

##Setting seed and imputing after setting id variables:
set.seed(02138)
a.out <- amelia(kimuli_impute, m = 5, 
                idvars = c("country", "country_code", "source",
                           "Region", "low_mid_high", "LA_SSA"))

## Missingness map, summary of missing values
missmap(a.out)
summary(a.out)

# Running regressions on each imputation, then merging them
all_imputations <- bind_rows(unclass(a.out$imputations), .id = "m") %>%
  group_by(m) %>%
  nest()

models_imputations <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params <- models_imputations %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs <- params %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses <- params %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded <- mi.meld(just_coefs, just_ses)

model_degree_freedom <- models_imputations %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                      t(coefs_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Calculating adjusted R2
r2s <- models_imputations %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

## Ordering output for plotting
order <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
           "polity2_new", "concurrent", "compulsory", "pr", "ge_est")

new_melded_order <- melded_summary %>% 
  filter(term != "(Intercept)") %>% 
  slice(match(order, term))
new_melded_order$term <- factor(new_melded_order$term, levels = order)
new_melded_order$term <- recode(new_melded_order$term, 
                            eth_frac_new = "Ethnic fractionalization",
                            homicide_rate_new = "Homicide rate",
                            gini_pre_tt_new = "Gini (pre-transfer)",
                            polity2_new = "Polity",
                            concurrent = "Concurrent elections",
                            compulsory = "Compulsory voting",
                            pr = "PR",
                            ge_est = "Government effectiveness")
                          
new_melded_order <- new_melded_order[, c(1,2,5,6)]
new_melded_order$replication <- 0

## Putting replication in same format:
replication <- lm(elec_dist_1_5 ~ ge_est + pr + compulsory + concurrent + polity_score +
                  giniall_gross + homicide + ef, 
                  data = kimuli_final)
replication_melded <- data.frame(confint(replication), replication$coefficients)
replication_melded <- rename(replication_melded,
                             conf.low = `X2.5..`,
                             conf.high = `X97.5..`,
                             estimate = replication.coefficients)
replication_melded$term <- c("(Intercept)", "ge_est", "pr", "compulsory",
                             "concurrent", "polity_score", "giniall_gross",
                             "homicide", "ef")
replication_melded$replication <- 1
replication_melded <- replication_melded[, c(4, 3, 1, 2, 5)]

## Ordering replication:

order_replication <- c("ef", "homicide", "giniall_gross", 
                       "polity_score", "concurrent", "compulsory", "pr", "ge_est")

replication_melded_order <- replication_melded %>% 
  filter(term != "(Intercept)") %>% 
  slice(match(order_replication, term))
replication_melded_order$term <- factor(replication_melded_order$term, levels = order_replication)
replication_melded_order$term <- recode(replication_melded_order$term, 
                                   ef = "Ethnic fractionalization",
                                   homicide = "Homicide rate",
                                   giniall_gross = "Gini (pre-transfer)",
                                   polity_score = "Polity",
                                   concurrent = "Concurrent elections",
                                   compulsory = "Compulsory voting",
                                   pr = "PR",
                                   ge_est = "Government effectiveness")

## Merging datasets and creating coefficient plot:
coefplot_repvsnew <- rbind(replication_melded_order, new_melded_order)
coefplot_repvsnew$replication <- as.factor(coefplot_repvsnew$replication) 

dodge <- position_dodge(width=0.8)  
ggplot(data = coefplot_repvsnew, 
       aes(x = estimate, y = term, color = replication))+
  geom_point(position=dodge)+
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.4, position = dodge)+
  scale_color_discrete(name = "Model", breaks = c("1", "0"), labels = c("Replication", "Updated"))+
  geom_vline(aes(xintercept = 0), linetype = 'dashed')+
  theme_minimal()+
  xlab("Estimated effect on electoral distance")+
  ylab("")
```

## Table 2: Observations by region

```{r Table 2}
obsbyregion <- kimuli_final %>% 
  group_by(Region) %>% 
  summarise(obs = n())

xtable(obsbyregion) # Creating LaTex table
```

## Figure 2: Scatterplot of government effectiveness and electoral distance by region

```{r Figure 2}
#Noting regions with most points for geom_smooth
impt_regions <- kimuli_final %>% 
  filter(Region %in% c("Sub-Saharan Africa",
                       "Europe and Central Asia",
                       "Latin America and the Caribbean")) 

ggplot()+
  geom_point(data = kimuli_final, 
             aes(x = ge_est, y = elec_dist_1_5, color = Region))+
  geom_smooth(data = impt_regions, 
              aes(x = ge_est, y = elec_dist_1_5, color = Region),
              method = "lm", se = FALSE)+
  scale_color_manual(labels=c("E Asia and Pacific", "Europe and Cent Asia", "Lat Am and Caribbean", "Mid East and N Africa", "North America", "South Asia", "Sub-Saharan Africa"),
                     values=c("#666666", "#0000FF", "#00FF00", "#999999", "#CC99CC", "#CCCCFF", "#FF3300"))+
  xlab("Government Effectiveness")+
  ylab("Electoral Distance")+
  theme_minimal()
```

## Figure 3: Coefficient plot comparing models with and without region dummy

```{r Figure 3}
## Mutating previous melded_summary output for base model to fit new plot:
order <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
           "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "(Intercept)")

melded_order <- melded_summary %>% 
  slice(match(order, term))
melded_order$term <- factor(melded_order$term, levels = order)
melded_order$term <- recode(melded_order$term, 
                            eth_frac_new = "Ethnic fractionalization",
                            homicide_rate_new = "Homicide rate",
                            gini_pre_tt_new = "Gini (pre-transfer)",
                            polity2_new = "Polity",
                            concurrent = "Concurrent elections",
                            compulsory = "Compulsory voting",
                            pr = "PR",
                            ge_est = "Government effectiveness",
                            `(Intercept)` = "LA SSA dummy")
melded_order$la_ssa <- 0
melded_order[9, 2:7] <- NA

# Running regressions on each imputation, then merging them (with regional dummy)
models_imputations_LA_SSA <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new +
                                     LA_SSA,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs_LA_SSA <- params_LA_SSA %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses_LA_SSA <- params_LA_SSA %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded_LA_SSA <- mi.meld(just_coefs_LA_SSA, just_ses_LA_SSA)

model_degree_freedom_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary_LA_SSA <- as.data.frame(cbind(t(coefs_melded_LA_SSA$q.mi),
                                             t(coefs_melded_LA_SSA$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if (is.numeric, round, 3)

## Calculating adjusted R2
r2s <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

## Ordering variables for plot
order_LA_SSA <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
           "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "LA_SSA1")

melded_order_LA_SSA <- melded_summary_LA_SSA %>% 
  filter(term != "(Intercept)") %>% 
  slice(match(order_LA_SSA, term))
melded_order_LA_SSA$term <- factor(melded_order_LA_SSA$term, levels = order_LA_SSA)
melded_order_LA_SSA$term <- recode(melded_order_LA_SSA$term, 
                            eth_frac_new = "Ethnic fractionalization",
                            homicide_rate_new = "Homicide rate",
                            gini_pre_tt_new = "Gini (pre-transfer)",
                            polity2_new = "Polity",
                            concurrent = "Concurrent elections",
                            compulsory = "Compulsory voting",
                            pr = "PR",
                            ge_est = "Government effectiveness",
                            LA_SSA1 = "LA SSA dummy")
melded_order_LA_SSA$la_ssa <- 1

## Merging and plotting data
coefplot <- rbind(melded_order, melded_order_LA_SSA)
coefplot$la_ssa <- as.factor(coefplot$la_ssa)

dodge <- position_dodge(width=0.8)  
ggplot(data = coefplot, 
       aes(x = estimate, y = term, color = la_ssa))+
  geom_point(position=dodge)+
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.4, position = dodge)+
  scale_color_discrete(name = "Model", labels = c("Without \nregion", "With \nregion"))+
  geom_vline(aes(xintercept = 0), linetype = 'dashed')+
  theme_minimal()+
  xlab("Estimated effect on electoral distance")+
  ylab("")
```

## Figure 4: First difference plot of electoral distance between LA/SSA and non-LA/SSA countries over range of government effectiveness
Holding covariates at their observed values.

```{r Figure 4}
LA_SSA <- lm(elec_dist_1_5 ~ ge_est + pr + compulsory + concurrent + polity2_new
             + gini_pre_tt_new + homicide_rate_new + eth_frac_new + LA_SSA +
             LA_SSA*ge_est, 
             data = kimuli_final)

simulate_CI <- function(fitted, nsims = 5000) {
  # Drawing from the distribution of the parameters
  coef_simed <- rmvnorm(nsims, mean = coef(fitted), sigma = vcov(fitted))
  X <- model.matrix(fitted)
  # Calculate the predicted probabilities for LA/SSA vs. not LA/SSA countries
  # ..as we vary government effectiveness
  CI_simed <- lapply(seq(min(kimuli_final$ge_est, na.rm = TRUE), max(kimuli_final$ge_est, na.rm = TRUE), length.out = 25),function(i) {
    X_low <- X
    X_high <- X
    X_low[, "ge_est"] <- i
    X_high[, "ge_est"] <- i
    X_low[, "LA_SSA1"] <- 0
    X_high[, "LA_SSA1"] <- 1
    # Set the interaction
    X_low[, "ge_est:LA_SSA1"] <- 0*i
    X_high[, "ge_est:LA_SSA1"] <- 1*i
    # Calculate difference
    pr_low <- colMeans(X_low %*% t(coef_simed))
    pr_high <- colMeans(X_high %*% t(coef_simed))
    fd <- pr_high - pr_low
    # Reformat the result
    res_q <- quantile(fd, prob = c(0.025, 0.5, 0.975))
    tibble(ge_est = i, 
           p025 = res_q[1], 
           median = res_q[2], 
           p975 = res_q[3], 
           mean = mean(fd)) -> res
    return(res)})%>% bind_rows()
  return(CI_simed)
}

CI_sim_LA_SSA <- simulate_CI(LA_SSA)

ggplot(data = CI_sim_LA_SSA, aes(x = ge_est, y = mean))+
  geom_hline(yintercept = 0, linetype = "dashed")+
  geom_line()+
  geom_ribbon(aes(ymin = p025, ymax = p975), alpha = 0.25, color = "blue", fill = "blue")+
  xlab("Government Effectiveness")+
  ylab("First Difference")
```

## Figure 5: Scatterplots showing relationship between clientelism and government effectiveness or electoral distance by region.

```{r Figure 5}
## Plot of government effectiveness and clientelism
clientelism_ge <- ggplot()+
  geom_point(data = kimuli_final, 
             aes(x = clientelism_index, y = gover_effect, color = Region))+
  scale_color_manual(values=c("#666666", "#0000FF", "#00FF00", "#999999", "#CC99CC", "#CCCCFF", "#FF3300"))+
  xlab("Clientelism Index")+
  ylab("Government Effectiveness")+
  theme_minimal()

clientelism_ge <- clientelism_ge + theme(legend.position = "none")

## Plot of electoral distance and clientelism
clientelism_elecdist <- ggplot()+
  geom_point(data = kimuli_final, 
             aes(x = clientelism_index, y = elec_dist_1_5, color = Region))+
  scale_color_manual(labels=c("E Asia and Pacific", "Europe and Cent Asia", "Lat Am and Caribbean", "Mid East and N Africa", "North America", "South Asia", "Sub-Saharan Africa"),
                     values=c("#666666", "#0000FF", "#00FF00", "#999999", "#CC99CC", "#CCCCFF", "#FF3300"))+
  xlab("Clientelism Index")+
  ylab("Electoral Distance")+
  theme_minimal()

## Arranging plots in grid
plot_grid(clientelism_ge, clientelism_elecdist, 
          align = "h", ncol = 2, rel_widths = c(1.12, 2.07))
```

## Figure 6: Coeffient plot of baseline model, model with clientelism, and model with clientelism and region.

```{r Figure 6}
## Model with clientelism and region
models_imputations_LA_SSA <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new +
                                     clientelism_index + LA_SSA,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs_LA_SSA <- params_LA_SSA %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses_LA_SSA <- params_LA_SSA %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded_LA_SSA <- mi.meld(just_coefs_LA_SSA, just_ses_LA_SSA)

model_degree_freedom_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary_LA_SSA <- as.data.frame(cbind(t(coefs_melded_LA_SSA$q.mi),
                                             t(coefs_melded_LA_SSA$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Determining R2
r2s <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

order_LA_SSA <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
                  "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "clientelism_index", "LA_SSA1")

## Ordering model output for plotting
melded_order_LA_SSA <- melded_summary_LA_SSA %>% 
  filter(term != "(Intercept)") %>% 
  slice(match(order_LA_SSA, term))
melded_order_LA_SSA$term <- factor(melded_order_LA_SSA$term, levels = order_LA_SSA)
melded_order_LA_SSA$term <- recode(melded_order_LA_SSA$term, 
                                   eth_frac_new = "Ethnic fractionalization",
                                   homicide_rate_new = "Homicide rate",
                                   gini_pre_tt_new = "Gini (pre-transfer)",
                                   polity2_new = "Polity",
                                   concurrent = "Concurrent elections",
                                   compulsory = "Compulsory voting",
                                   pr = "PR",
                                   ge_est = "Government effectiveness",
                                   clientelism_index = "Clientelism",
                                   LA_SSA1 = "LA SSA dummy")
melded_order_LA_SSA$la_ssa <- 1

## Ordering baseline model output for plotting
order <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
           "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "clientelism_index", "(Intercept)")

melded_order <- melded_summary %>% 
  add_row(term = "clientelism_index", estimate = NA,
          std.error = NA, statistic = NA, conf.low = NA,
          conf.high = NA, p.value = NA) %>% 
  slice(match(order, term))
melded_order$term <- factor(melded_order$term, levels = order)
melded_order$term <- recode(melded_order$term, 
                            eth_frac_new = "Ethnic fractionalization",
                            homicide_rate_new = "Homicide rate",
                            gini_pre_tt_new = "Gini (pre-transfer)",
                            polity2_new = "Polity",
                            concurrent = "Concurrent elections",
                            compulsory = "Compulsory voting",
                            pr = "PR",
                            ge_est = "Government effectiveness",
                            clientelism_index = "Clientelism",
                            `(Intercept)` = "LA SSA dummy")
melded_order[10, 2:7] <- NA
melded_order$la_ssa <- 0

## Model with clientelism
models_imputations_client <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new +
                                     clientelism_index,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params <- models_imputations_client %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs <- params %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses <- params %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded <- mi.meld(just_coefs, just_ses)

model_degree_freedom <- models_imputations %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                             t(coefs_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Calculating R2
r2s <- models_imputations_client %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

## Ordering output of clientelism model for plotting
melded_order_client <- melded_summary %>% 
  slice(match(order, term))
melded_order_client$term <- factor(melded_order_client$term, levels = order)
melded_order_client$term <- recode(melded_order_client$term, 
                                   eth_frac_new = "Ethnic fractionalization",
                                   homicide_rate_new = "Homicide rate",
                                   gini_pre_tt_new = "Gini (pre-transfer)",
                                   polity2_new = "Polity",
                                   concurrent = "Concurrent elections",
                                   compulsory = "Compulsory voting",
                                   pr = "PR",
                                   ge_est = "Government effectiveness",
                                   clientelism_index = "Clientelism",
                                   `(Intercept)` = "LA SSA dummy")
melded_order_client[10, 2:7] <- NA
melded_order_client$la_ssa <- 2

## Merging and plotting
coefplot_client <- rbind(melded_order, melded_order_LA_SSA, melded_order_client)
coefplot_client$la_ssa <- as.factor(coefplot_client$la_ssa)

dodge <- position_dodge(width=0.8)  
ggplot(data = coefplot_client, 
       aes(x = estimate, y = term, color = la_ssa))+
  geom_point(position=dodge)+
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.4, position = dodge)+
  scale_color_discrete(name = "Model", labels = c("Baseline", "With \nclientelism", "With\nclientelism \nand region"))+
  geom_vline(aes(xintercept = 0), linetype = 'dashed')+
  theme_minimal()+
  xlab("Estimated effect on electoral distance")+
  ylab("")
```

## Figure 7: Scatterplots showing relationship between clientelism and government effectiveness or electoral distance by region.

```{r Figure 7}
ggplot()+
  geom_point(data = kimuli_final %>% 
               group_by(country_code, Region) %>% 
               summarise(mean_red = mean(rel_red_gini),
                         mean_ge = mean(ge_est)), 
             aes(x = mean_red, y = mean_ge, color = Region))+
  geom_smooth(data = impt_regions %>% 
               group_by(country_code, Region) %>% 
               summarise(mean_red = mean(rel_red_gini),
                         mean_ge = mean(ge_est)), 
             aes(x = mean_red, y = mean_ge, color = Region), method = "lm", se = F)+
  scale_color_manual(labels=c("E Asia and Pacific", "Europe and Cent Asia", "Lat Am and Caribbean", "Mid East and N Africa", "North America", "South Asia", "Sub-Saharan Africa"),
                     values=c("#666666", "#0000FF", "#00FF00", "#999999", "#CC99CC", "#CCCCFF", "#FF3300"))+
  xlab("Mean Relative Redistribution")+
  ylab("Mean Government Effectiveness")+
  theme_minimal()+
  xlim(-0.1, 0.5) # Removing significant outlier of Ukraine
```

## Figure 8: Coefficient plot comparing baseline model, model including relative redistribution, and model including relative redistribution and region.

```{r Figure 8}
## Model with region and relative redistribution
models_imputations_LA_SSA <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new +
                                     rel_red_gini + LA_SSA,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs_LA_SSA <- params_LA_SSA %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses_LA_SSA <- params_LA_SSA %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded_LA_SSA <- mi.meld(just_coefs_LA_SSA, just_ses_LA_SSA)

model_degree_freedom_LA_SSA <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary_LA_SSA <- as.data.frame(cbind(t(coefs_melded_LA_SSA$q.mi),
                                             t(coefs_melded_LA_SSA$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Computing R2
r2s <- models_imputations_LA_SSA %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

order_LA_SSA <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
                  "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "rel_red_gini", "LA_SSA1")

# Ordering output for plotting
melded_order_LA_SSA <- melded_summary_LA_SSA %>% 
  filter(term != "(Intercept)") %>% 
  slice(match(order_LA_SSA, term))
melded_order_LA_SSA$term <- factor(melded_order_LA_SSA$term, levels = order_LA_SSA)
melded_order_LA_SSA$term <- recode(melded_order_LA_SSA$term, 
                                   eth_frac_new = "Ethnic fractionalization",
                                   homicide_rate_new = "Homicide rate",
                                   gini_pre_tt_new = "Gini (pre-transfer)",
                                   polity2_new = "Polity",
                                   concurrent = "Concurrent elections",
                                   compulsory = "Compulsory voting",
                                   pr = "PR",
                                   ge_est = "Government effectiveness",
                                   rel_red_gini = "Relative redistribution",
                                   LA_SSA1 = "LA SSA dummy")
melded_order_LA_SSA$la_ssa <- 1

## Re-running baseline model for plotting
models_imputations <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params <- models_imputations %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs <- params %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses <- params %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded <- mi.meld(just_coefs, just_ses)

model_degree_freedom <- models_imputations %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                      t(coefs_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Ordering output for plotting
order <- c("eth_frac_new", "homicide_rate_new", "gini_pre_tt_new", 
           "polity2_new", "concurrent", "compulsory", "pr", "ge_est", "rel_red_gini", "(Intercept)")

melded_order <- melded_summary %>% 
  add_row(term = "rel_red_gini", estimate = NA,
          std.error = NA, statistic = NA, conf.low = NA,
          conf.high = NA, p.value = NA) %>% 
  slice(match(order, term))
melded_order$term <- factor(melded_order$term, levels = order)
melded_order$term <- recode(melded_order$term, 
                            eth_frac_new = "Ethnic fractionalization",
                            homicide_rate_new = "Homicide rate",
                            gini_pre_tt_new = "Gini (pre-transfer)",
                            polity2_new = "Polity",
                            concurrent = "Concurrent elections",
                            compulsory = "Compulsory voting",
                            pr = "PR",
                            ge_est = "Government effectiveness",
                            rel_red_gini = "Relative redistribution",
                            `(Intercept)` = "LA SSA dummy")
melded_order[10, 2:7] <- NA
melded_order$la_ssa <- 0

## Model with relative redistribution
models_imputations_relred <- all_imputations %>%
  mutate(model = data %>% map(~ lm(elec_dist_1_5 ~ ge_est + pr +
                                     compulsory + concurrent +
                                     polity2_new + gini_pre_tt_new +
                                     homicide_rate_new + eth_frac_new +
                                     rel_red_gini,
                                   data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))

params <- models_imputations_relred %>%
  unnest(tidied) %>%
  select(m, term, estimate, std.error) %>%
  gather(key, value, estimate, std.error) %>%
  spread(term, value) %>% 
  ungroup()

just_coefs <- params %>%
  filter(key == "estimate") %>%
  select(-m, -key)
just_ses <- params %>%
  filter(key == "std.error") %>%
  select(-m, -key)
coefs_melded <- mi.meld(just_coefs, just_ses)

model_degree_freedom <- models_imputations %>%
  unnest(glance) %>%
  filter(m == "imp1") %>%
  pull(df.residual)

melded_summary <- as.data.frame(cbind(t(coefs_melded$q.mi),
                                      t(coefs_melded$se.mi))) %>%
  magrittr::set_colnames(c("estimate", "std.error")) %>%
  mutate(term = rownames(.)) %>%
  select(term, everything()) %>%
  mutate(statistic = estimate / std.error,
         conf.low = estimate + std.error * qt(0.025, model_degree_freedom),
         conf.high = estimate + std.error * qt(0.975, model_degree_freedom),
         p.value = 2 * pt(abs(statistic), model_degree_freedom, lower.tail = FALSE)) %>% 
  mutate_if(is.numeric, round, 3)

## Computing R2
r2s <- models_imputations_relred %>%
  unnest(glance) %>%
  select(m, adj.r.squared, df.residual) %>%
  mutate(R = sqrt(adj.r.squared),  # Regular R
         Q = 0.5 * log((R + 1) / (1 - R)),  # Fisher z-transformation
         se = 1 / df.residual)  # R2 variance
Q_melded <- mi.meld(as.matrix(r2s$Q), as.matrix(r2s$se))
r2_melded <- ((exp(2 * Q_melded$q.mi) - 1) / (1 + exp(2 * Q_melded$q.mi)))^2

## Ordering output for plotting
melded_order_relred <- melded_summary %>% 
  slice(match(order, term))
melded_order_relred$term <- factor(melded_order_relred$term, levels = order)
melded_order_relred$term <- recode(melded_order_relred$term, 
                                   eth_frac_new = "Ethnic fractionalization",
                                   homicide_rate_new = "Homicide rate",
                                   gini_pre_tt_new = "Gini (pre-transfer)",
                                   polity2_new = "Polity",
                                   concurrent = "Concurrent elections",
                                   compulsory = "Compulsory voting",
                                   pr = "PR",
                                   ge_est = "Government effectiveness",
                                   rel_red_gini = "Relative redistribution",
                                   `(Intercept)` = "LA SSA dummy")
melded_order_relred[10, 2:7] <- NA
melded_order_relred$la_ssa <- 2

## Merging and plotting
coefplot_relred <- rbind(melded_order, melded_order_LA_SSA, melded_order_relred)
coefplot_relred$la_ssa <- as.factor(coefplot_relred$la_ssa)

dodge <- position_dodge(width=0.8)  
ggplot(data = coefplot_relred, 
       aes(x = estimate, y = term, color = la_ssa))+
  geom_point(position=dodge)+
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.4, position = dodge)+
  scale_color_discrete(name = "Model", labels = c("Baseline", "With\nrelative \nredistribution \nand region", "With \nrelative \nredistribution"))+
  geom_vline(aes(xintercept = 0), linetype = 'dashed')+
  theme_minimal()+
  xlab("Estimated effect on electoral distance")+
  ylab("")
```
